*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0

* This do file summarizes the geography of prime locations across US MSAs

* Load data
	u "$temp/grid_PL_output_p${SL}/grid_PL_output_p${SL}__all.dta", clear

* PL count
	egen PL_count=max(PLrankByCity), by(cbsafp)
	
* PL area share
	gen grid_area = (0.25)^2
	label var grid_area "Area in sq. km"
	egen AllPL_area = sum(area) if PL == 1, by(cbsafp)
	egen metro_area = sum(area) if developable == 1,  by(cbsafp)
	gen PL_area_pct = AllPL_area/metro_area*100
	
* PL emp share
	egen AllPL_emp = sum(emp) if PL == 1, by(cbsafp)
	gen PL_emp_pct =  AllPL_emp / metro_emp * 100

* PL emp shares
	foreach name in MFG NT PS TS O  {
		egen AllPL_`name'_emp = sum(cell_`name'_emp) if PL == 1, by(cbsafp)
		egen metro_`name'_emp = sum(cell_`name'_emp) if developable == 1, by(cbsafp)
		gen PL_`name'_emp_pct =  AllPL_`name'_emp / metro_`name'_emp * 100		
	}
	
* Generate "large CBSA" identifier	
	gen large_CBSA = metro_emp >= 1000000
	
* Condense to CBSA level
	collapse (first)  metro_name (mean) PL_count PL_area_pct PL_emp_pct PL_*_emp_pct metro_area metro_emp metro_*_emp, by(cbsafp large_CBSA)

* Label variables
	label var PL_count "Number of prime locations"
	label var PL_area "PL share of area (%)"
	label var PL_emp "PL share of total employment (%)"
	label var PL_TS_emp "PL share of tradable services employment (%)"
	label var PL_MFG_emp "PL share of manufacturing employment (%)"
	label var PL_NT_emp "PL share of non-tradable services employment (%)"
	label var PL_PS_emp "PL share of public services employment (%)"
	label var PL_O_emp "PL share of other employment (%)"
	replace metro_emp = metro_emp / 1000
	label var metro_emp "Total grid employment (in 1000)"
	label var metro_area "Developable grid area (in sq. km)"
	
* Illustrate distributions
	sum PL_count if large_CBSA== 0
	local mean_small = r(mean)
	sum PL_count if large_CBSA== 1
	local mean_large = r(mean)
	twoway (hist PL_count if  large_CBSA== 0 , color(blue%50) start(0.5) width(1)) ///
		   (hist PL_count if large_CBSA== 1  , color(red%50) start(0.50) width(1)) /// 
			, graphregion(color(white)) name(count, replace) legend(order(1 "CBSA employment <= 1M" 2 "CBSA employment > 1M") pos(6) cols(2) region(lstyle(none))) 	///
			xlabel(1[1]10)  xline(`mean_small',lcolor(blue) lpattern(shortdash))  xline(`mean_large',lcolor(red) lpattern(longdash))
	sum PL_area_pct  if large_CBSA== 0
	local mean_small = r(mean)
	sum PL_area_pct if large_CBSA== 1
	local mean_large = r(mean)
	twoway (hist PL_area_pct if  large_CBSA== 0 , color(blue%50) start(0) width(0.025)) ///
		   (hist PL_area_pct if large_CBSA== 1  , color(red%50) start(0) width(0.025)) /// 
			, graphregion(color(white)) name(area, replace)	 xline(`mean_small',lcolor(blue) lpattern(shortdash))  xline(`mean_large',lcolor(red) lpattern(longdash))
	sum PL_emp_pct  if large_CBSA== 0
	local mean_small = r(mean)
	sum PL_emp_pct if large_CBSA== 1
	local mean_large = r(mean)	
	twoway (hist PL_emp_pct if  large_CBSA== 0 , color(blue%50) start(0) width(1.5)) ///
		   (hist PL_emp_pct if large_CBSA== 1  , color(red%50) start(0) width(1.5)) /// 
			, graphregion(color(white)) name(emp, replace)	 legend(nobox)  xline(`mean_small',lcolor(blue) lpattern(shortdash))  xline(`mean_large',lcolor(red) lpattern(longdash))
	sum PL_TS_emp  if large_CBSA== 0
	local mean_small = r(mean)
	sum PL_TS_emp if large_CBSA== 1
	local mean_large = r(mean)
	twoway (hist PL_TS_emp if  large_CBSA== 0 , color(blue%50) start(0) width(2.5)) ///
		   (hist PL_TS_emp if large_CBSA== 1  , color(red%50) start(0) width(2.5)) /// 
			, graphregion(color(white)) name(TS_emp, replace)	legend(nobox)  xline(`mean_small',lcolor(blue) lpattern(shortdash))  xline(`mean_large',lcolor(red) lpattern(longdash))
	grc1leg count area emp TS_emp, graphregion(color(white)) graphregion(color(white)) xsize(10) ysize(5) cols(2) 
* Export Figure B.2.2 (figure for preferred p-value will be chosen later) 
	capture mkdir "$temp/figures_App"
	capture mkdir "$temp/figures_App/US-CBSAs"
	capture mkdir "$temp/figures_App/US-CBSAs/p${SL}"
	graph export  "$temp/figures_App/US-CBSAs/p${SL}/FIG_B2_2_US-CBSA-histograms.pdf", replace
	
* Provocative correlations *****************************************************
	gen lmetro_emp = ln(metro_emp)
	label var lmetro_emp "Log employment"
	reg PL_emp_pct lmetro_emp, robust
	
* Define simple program for easy illustration
	capture program drop SCATTER	
	program define SCATTER	
		local ylab: variable label `1'
		reg `1' `2' , robust
			local b_all = round(_b[`2'],0.01)
			local b_all_ft : display %3.2f `b_all'
			local se_all = round(_se[`2'],0.01)
			local se_all_ft : display %3.2f `se_all'
		reg `1' `2' if  large_CBSA== 0, robust
			local b_small = round(_b[`2'],0.01)
			local b_small_ft : display %3.2f `b_small'
			local se_small = round(_se[`2'],0.01)
			local se_small_ft : display %3.2f `se_small'
		reg `1' `2' if  large_CBSA== 1, robust
			local b_large = round(_b[`2'],0.01)
			local b_large_ft : display %3.2f `b_large'
			local se_large = round(_se[`2'],0.01)
			local se_large_ft : display %3.2f `se_large'
		twoway 	(scatter `1' `2' if  large_CBSA== 0 , mcolor(blue%25) mlcolor(blue%0) msize(small))  (lfit `1' `2' if  large_CBSA== 0, lcolor(blue) lpattern(shortdash)) ///
			(scatter `1' `2' if  large_CBSA== 1 , mcolor(red%25) mlcolor(red%0) msize(small) msymbol(S)) (lfit `1' `2' if  large_CBSA== 1 , lcolor(red) lpattern(longdash)) ///
			, legend(order(1 "CBSA employment <= 1M" 3 "CBSA employment > 1M") pos(6) cols(2) region(lstyle(none))) name(`3', replace) ///
			ytitle("`ylab'") `4' `5' `6' `7' `8' ///
			note("dy/dx =  `b_all_ft' (`se_all_ft')"  "dy/dx | small = `b_small_ft' (`se_small_ft')" "dy/dx | large = `b_large_ft' (`se_large_ft')") graphregion(color(white))
	end
	
* Generate figures
	sum PL_count
	local PLmax = r(max)
	SCATTER PL_count lmetro_emp "num_pop" "" "ylabel(1[1]`PLmax')"
	SCATTER PL_emp_pct PL_count "share_num" "xlabel(1[1]`PLmax')" 
	SCATTER PL_emp_pct lmetro_emp "share_pop" ""
	grc1leg num_pop share_pop share_num , graphregion(color(white)) scale(0.8) cols(3) xsize(10) ysize(5) graphregion(margin(0 0 0 0))

* Export Figure 2, panel a (figure for preferred p-value will be chosen later) 
	capture mkdir "$temp/figures"
	capture mkdir "$temp/figures/US-CBSAs"
	capture mkdir "$temp/figures/US-CBSAs/p${SL}"
	graph export  "$temp/figures/US-CBSAs/p${SL}/FIG_2a_US-CBSA-correlations.pdf", replace	
	grc1leg num_pop share_pop share_num , graphregion(color(white)) scale(1.25) cols(3)  xsize(12) ysize(4) graphregion(margin(0 0 0 0))
	graph export  "$temp/figures/US-CBSAs/p${SL}/FIG_2a_US-CBSA-correlations.png", replace  height(600) width(1800)
	
	
* Merge additional data for more provicative correlations
	merge 1:1 cbsafp using "$root/_Data/US METRO DATA/Housing/cbsa_hpi.dta" // Merge house price index
	drop _m
	merge 1:1 cbsafp using "$root/_Data/US METRO DATA/LandUseRegulation/WRLURI2019_CBSA.dta", keepusing(WRLURI18_mean) // Merge land use regulation index
	drop if _m == 2
	drop _m
	merge 1:1 cbsafp using "$root/_Data/US METRO DATA/HeightGap/cbsa-AdjHeightGIS.dta", keepusing(AdjHeight) // Merge Height gaps
	drop if _m == 2
	drop _m
	merge 1:1 cbsafp using "$root/_Data/US METRO DATA/HistoricMfg/cbsa_mfgshare1940.dta" // Merge historic manufacturing share_num
	drop if _m == 2
	drop _m	
	
* Save data for later generation 
	capture mkdir "$temp/US-CBSAs"
	capture mkdir "$temp/US-CBSAs/p${SL}"	
	qui save "$temp/US-CBSAs/p${SL}/MSA_UsingData.dta", replace
	
* Effect of historic MFG share on city structure	
	eststo:  reg  PL_count mfgshare1940, robust
	eststo:  reg  PL_count mfgshare1940 lmetro_emp, robust
	eststo:  reg  PL_count mfgshare1940 lmetro_emp if metro_emp >= 1000, robust
	
	eststo:  reg  PL_emp_pct mfgshare1940, robust
	eststo:  reg  PL_emp_pct mfgshare1940 lmetro_emp, robust
	eststo:  reg  PL_emp_pct mfgshare1940 lmetro_emp if metro_emp >= 1000, robust	
	
* Export Table B.2.1 (figure for preferred p-value will be chosen later) 
	capture mkdir "$temp/tables_App"
	capture mkdir "$temp/tables_App/US-CBSAs"
	capture mkdir "$temp/tables_App/US-CBSAs/p${SL}"
	esttab  using "$temp/tables_App/US-CBSAs/p${SL}/TAB_B2_1_HistoricMFGshare.tex", replace b(3) se(4) label compress   r2(3) ///
			stats( N r2 , fmt(%18.3g )   labels(  `"Observations"' `"\(R^{2}\)"' ))  ///   stats(Country_effects IV N r2) 0.* "`City'"
			title("Prime location geography and historic manufacturing share") modelwidth(6) nogap nostar  addnote( "Unit of observation is MSA." )	
			eststo clear	
			
* Effect of land use regulation on city structure	
	eststo:  reg  PL_count WRLURI18_mean, robust
	eststo:  reg  PL_count WRLURI18_mean lmetro_emp, robust
	eststo:  reg  PL_count WRLURI18_mean lmetro_emp if metro_emp >= 1000, robust
	
	eststo:  reg  PL_emp_pct WRLURI18_mean, robust
	eststo:  reg  PL_emp_pct WRLURI18_mean lmetro_emp, robust
	eststo:  reg  PL_emp_pct WRLURI18_mean lmetro_emp if metro_emp >= 1000, robust	

* Export Table B.2.2 (figure for preferred p-value will be chosen later) 
	esttab  using "$temp/tables_App/US-CBSAs/p${SL}/TAB_B2_2_LUR.tex", replace b(3) se(4) label compress   r2(3) ///
			stats( N r2 , fmt(%18.3g )   labels(  `"Observations"' `"\(R^{2}\)"' ))  ///   stats(Country_effects IV N r2) 0.* "`City'"
			title("Prime location geography and land use regulation") modelwidth(6) nogap nostar  addnote( "Unit of observation is MSA." )	
			eststo clear				
			
* Effect of height gap on city structure	
	eststo:  reg  PL_count AdjHeight, robust
	eststo:  reg  PL_count AdjHeight lmetro_emp , robust
	eststo:  reg  PL_count AdjHeight lmetro_emp  if metro_emp >= 1000, robust
	
	eststo:  reg  PL_emp_pct AdjHeight, robust
	eststo:  reg  PL_emp_pct AdjHeight lmetro_emp , robust
	eststo:  reg  PL_emp_pct AdjHeight lmetro_emp  if metro_emp >= 1000, robust	
	
* Export Table B.2.3 (figure for preferred p-value will be chosen later) 
	esttab  using "$temp/tables_App/US-CBSAs/p${SL}/TAB_B2_3_AdjustedHeight.tex", replace b(3) se(4) label compress   r2(3) ///
			stats( N r2 , fmt(%18.3g )   labels(  `"Observations"' `"\(R^{2}\)"' ))  ///   stats(Country_effects IV N r2) 0.* "`City'"
			title("Prime location geography and adjusted height") modelwidth(6) nogap nostar  addnote( "Unit of observation is MSA." )	
			eststo clear	
	
* Classify
	gen Type1 = ""
	gen Type2 = ""
	gen Type = ""
	replace Type1 = "Monocentric" if PL_count == 1
	replace Type1 = "Duocentric" if PL_count == 2
	replace Type1 = "Polycentric" if PL_count > 2

* Check monocentricity by city size
	tab Type1 if metro_emp < 1000
	tab Type1 if metro_emp >= 1000

* Cities with more than five prime locations
	tab PL_count if PL_count > 5

* Classify dispersion by size
	sum PL_TS_emp_pct ,d
	sum PL_TS_emp_pct if metro_emp >= 1000 , d 				// Large cities
	local medianTSshare_large = r(p50)
	replace Type2 = "Agglomerated" if PL_TS_emp_pct >= `medianTSshare_large' & metro_emp >= 1000
	replace Type2 = "Dispersed" if PL_TS_emp_pct < `medianTSshare_large' & metro_emp >= 1000
	sum PL_TS_emp_pct if metro_emp < 1000 , d 				// Small cities
	local medianTSshare_small = r(p50)
	replace Type2 = "Agglomerated" if PL_TS_emp_pct >= `medianTSshare_large' & metro_emp < 1000
	replace Type2 = "Dispersed" if PL_TS_emp_pct < `medianTSshare_large' & metro_emp < 1000

* Combine structure variables
	replace Type = Type1+"-"+Type2	

* Sort by employment
	gsort -metro_emp

* Totals
	local obs = _N+2
	set obs `obs'
	foreach var of varlist PL_count PL_area_pct PL_emp_pct  PL_TS_emp{
		sum `var'  if large_CBSA == 1
		replace `var' = r(mean) if _n == _N-1
		sum `var' if large_CBSA == 0
		replace `var' = r(mean) if _n == _N
	}
	replace metro_name = "Mean, employment >= 1M" if  _n == _N-1
	replace metro_name = "Mean, employment < 1M" if  _n == _N
	
* Save file
	keep cbsafp metro_name PL_count PL_area_pct PL_emp_pct PL_*_emp_pct Type metro_area  metro_emp	 

* Final labelling 
	label var metro_name "Metro name"
	label var Type "Metro classification"

* Save MSA data set
	capture mkdir "$temp/US-CBSAs"
	capture mkdir "$temp/US-CBSAs/p${SL}"
	save "$temp/US-CBSAs//p${SL}/metro-data.dta", replace	
	export delimited using "$temp/US-CBSAs/p${SL}/metro-data.csv", replace	
	
* Finalize
	foreach var of varlist  PL_area_pct PL_emp_pct  PL_TS_emp {
		tostring `var', replace force  format(%9.2f)
		replace `var' = `var'+"%" 
	}
	* Drop since TS definition redundant
	capture drop Type_TS
	label var PL_count "# PL"
	label var Type "Classification"
	order metro_name Type PL_count PL_area_pct PL_emp_pct PL_TS_emp_pct metro_area metro_emp
	keep if _n <= 10 | _n >= _N-1
	capture drop cbsafp 
	split metro_name, parse("-")   
	replace metro_name = metro_name1
	capture drop metro_name1-metro_name6
	
* Export Table B.2.7 (figure for preferred p-value will be chosen later) 
	texsave metro_name Type PL_count PL_area_pct PL_emp_pct PL_TS_emp_pct using "$temp/tables_App/US-CBSAs/p${SL}/TAB_B2_7_PL_SUMMARY_B.tex", ///
	title("Summary of spatial structure") size("footnotesize")   width(15cm) align(lccccc)  varlabels replace  frag  ///
	footnote("Agglomerated and dispersed cities are defined by on whether the share of prime locations at total employment is larger or smaller than the median share.")
	
* Script ends

